Sentiment Analysis(IMDb) 4_SimpleRNN

SimpleRNN층을 사용한 모델은 장기간 의존성을 학습할 수 없고, 그레이디언트 감소나 폭주로 인한 영향을 받는다.
(위의 양방향 LSTM층을 단방향 SimpleRNN 층으로 바꾸면 그레이디언트가 소멸한다.(손실이 감소하지 않음))

위의 데이터셋에서 SimpleRNN으로 유의미한 예측 성능을 얻기 위해서는 시퀀스 길이를 줄여야 한다.
(영화리뷰에서 감성에 관한 정보가 많을 확률이 높은  영화 리뷰 마지막 문장을 사용)
preprocess_datasets() 헬퍼함수를 통하여 전처리 단계 연결
max_seq_length를 통해 데이터셋의 시퀀스 길이 조절
None으로 지정시, 모든 데이터셋 사용
from collections import Counter
def preprocess_datasets(ds_raw_train, ds_raw_valid, ds_raw_test, max_seq_length=None, batch_size=32):
#1 & 2
tokenizer=tfds.deprecated.text.Tokenizer()
token_counts=Counter()
for example in ds_raw_train:
tokens=tokenizer.tokenize(example[0].numpy()[0])
if max_seq_length is not None:
tokens=tokens[-max_seq_length:]
token_counts.update(tokens)
print(' :', len(token_counts))
#3
encoder=tfds.deprecated.text.TokenTextEncoder(token_counts)
def encode(text_tensor, label):
text=text_tensor.numpy()[0]
encoded_text=encoder.encode(text)
if max_seq_length is not None:
encoded_text=encoded_text[-max_seq_length:]
return encoded_text, label
def encode_map_fn(text, label):
return tf.py_function(encode, inp=[text, label], Tout=(tf.int64, tf.int64))
ds_train=ds_raw_train.map(encode_map_fn)
ds_valid=ds_raw_valid.map(encode_map_fn)
ds_test=ds_raw_test.map(encode_map_fn)
#4
train_data=ds_train.padded_batch(batch_size, padded_shapes=([-1], []))
valid_data=ds_valid.padded_batch(batch_size, padded_shapes=([-1], []))
test_data=ds_test.padded_batch(batch_size, padded_shapes=([-1], []))
return (train_data, valid_data, test_data, len(token_counts))
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import GRU
def build_run_model(embedding_dim, vocab_size, recurrent_type='SimpleRNN', n_recurrent_units=64, n_recurrent_layers=1, bidirectional=True):
tf.random.set_seed(1)
model=tf.keras.Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, name='embed_layer'))
for i in range(n_recurrent_layers):
return_sequences=(i<n_recurrent_layers-1)
if recurrent_type=='SimpleRNN':
recurrent_layer=SimpleRNN(units=n_recurrent_units, return_sequences=return_sequences, name='simprnn-layer-{}'.format(i))
elif recurrent_type=='LSTM':
recurrent_layer=LSTM(units=n_recurrent_units, return_sequences=return_sequences, name='lstm-layer-{}'.format(i))
elif recurrent_type=='GRU':
recurrent_layer=GRU(units=n_recurrent_units, return_sequences=return_sequences, name='gru-layer-{}'.format(i))
if bidirectional:
recurrent_layer=Bidirectional(recurrent_layer, name='bidir-'+recurrent_layer.name)
model.add(recurrent_layer)
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
return model
model build with helper function
batch_size=32
embedding_dim=20
max_seq_length=100
train_data, valid_data, test_data, n=preprocess_datasets(ds_raw_train, ds_raw_valid, ds_raw_test, max_seq_length=max_seq_length, batch_size=batch_size)
vocab_size=n+2
rnn_model=build_run_model(embedding_dim, vocab_size, recurrent_type='SimpleRNN', n_recurrent_units=64, n_recurrent_layers=1, bidirectional=True)
rnn_model.summary()
어휘 사전 크기: 58063

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
embed_layer (Embedding)      (None, None, 20)          1161300
_________________________________________________________________
bidir-simprnn-layer-0 (Bidir (None, 128)               10880
_________________________________________________________________
dense_7 (Dense)              (None, 64)                8256
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 65
=================================================================
Total params: 1,180,501
Trainable params: 1,180,501
Non-trainable params: 0
_________________________________________________________________
compile & fit
rnn_model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), metrics=['accuracy'])
history=rnn_model.fit(train_data, validation_data=valid_data, epochs=10)
results=rnn_model.evaluate(test_data)
print(' : {:.2f}%'.format(results[1]*100))
테스트 정확도 :73.56%